Explore those patients who have had repeat RRT events during an encounter



In [ ]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime
from impala.util import as_pandas
from impala.dbapi import connect
plt.style.use('ggplot')
%matplotlib notebook



In [ ]:

    
conn = connect(host="mycluster.domain.com", port=my_impala_port_number)
cur = conn.cursor()
cur.execute("use my_db")



In [ ]:

    
query_rrt_counts = """
SELECT enc.encntr_id, COUNT(1) AS count 
FROM encounter enc
INNER JOIN clinical_event ce 
ON enc.encntr_id = ce.encntr_id
WHERE enc.loc_facility_cd='633867'
AND enc.encntr_complete_dt_tm < 4e12
AND ce.event_cd='54411998'
AND ce.result_status_cd NOT IN ('31', '36')
AND ce.valid_until_dt_tm > 4e12
AND ce.event_class_cd not in ('654645')
AND enc.admit_type_cd != '0'
AND enc.encntr_type_class_cd = '391'
GROUP BY enc.encntr_id
ORDER BY enc.encntr_id;
"""



In [ ]:

    
cur.execute(query_rrt_counts)
df = as_pandas(cur)



In [ ]:

    
df.head()



In [ ]:

    
# Number of encounters with multiple RRTs:
len(df[df['count']>1])



In [ ]:

    
# to get just the encounter ids with counts >1
encs = df[df['count']>1]['encntr_id'].get_values()



In [ ]:

    
encs



In [ ]:

    
len(df)



In [ ]:

    
df.sort_values(by='count', ascending=False).head(10)



In [ ]:

    
df[df['count']>1].hist()

Of the all the people with valid & complete RRT events, some of them have multiple RRT events.

BUT: how many are truly multiple events, or mistaken entry, or bad data?

For each encoutner with multiple RRT events:

Check that the RRT events are reasonably spaced out

Check that the RRT events are all after arrival & before enc.departure



In [ ]:

    
# Print out the data related to the encounter with multiple RRT events, if the duration beteween RRT events is less than 1 hour, 
# or if the start & end time of the encounter doesn't make sense related to the time of the RRT event.
count = 0
for enc in encs:
    count+=1
    print "count: {0}".format(count)
    query = """
    SELECT 
          ce.encntr_id
        , ce.event_id
        , ce.valid_until_dt_tm
        , from_unixtime(CAST(ce.event_end_dt_tm / 1000 as bigint)) AS event_end
        , ce.event_end_dt_tm
        , from_unixtime(CAST(ce.valid_from_dt_tm / 1000 as bigint)) AS valid_from
        , from_unixtime(CAST(enc.arrive_dt_tm/1000 as bigint)) AS enc_arrive
        , enc.arrive_dt_tm
        , COALESCE(tci.checkin_dt_tm, enc.arrive_dt_tm) AS check_in_time
        , from_unixtime(CAST(COALESCE(tci.checkin_dt_tm
        , enc.arrive_dt_tm)/1000 as bigint)) AS check_in
        , from_unixtime(CAST(enc.depart_dt_tm/1000 as bigint)) AS enc_depart
        , enc.depart_dt_tm
    FROM clinical_event ce 
    INNER JOIN encounter enc 
    ON ce.encntr_id = enc.encntr_id
    LEFT OUTER JOIN  ( 
                        SELECT  
                            ti.encntr_id AS encntr_id
                          , MIN(tc.checkin_dt_tm) AS checkin_dt_tm
                        FROM tracking_item ti
                        JOIN tracking_checkin tc ON ti.tracking_id = tc.tracking_id
                        GROUP BY ti.encntr_id 
                     ) tci 
    ON tci.encntr_id = enc.encntr_id
    WHERE ce.event_cd = '54411998' AND ce.encntr_id='{0}'
    AND ce.valid_until_dt_tm>4e12;
    """.format(enc)

    cur.execute(query)
    dfenc = as_pandas(cur)

    dfenc['dep-perf_hrs'] = ((dfenc['depart_dt_tm']-dfenc['event_end_dt_tm'])/3600000)
    dfenc['perf-arr_hrs'] = ((dfenc['event_end_dt_tm']-dfenc['check_in_time'])/3600000)

    # if there's a problem, print the dfenc
    if any(dfenc['dep-perf_hrs']<0):
        print dfenc
    if any(dfenc['perf-arr_hrs']<0):
        print dfenc

    RRTtimes = sorted(dfenc['event_end_dt_tm'].get_values())

    time_lastloop = 0
    for RRTtime in RRTtimes:
        if time_lastloop == 0:
            time_lastloop = RRTtime
        else:
            if (RRTtime-time_lastloop)/3600000. < 1:
                print dfenc



In [ ]:

    
# Several of these have elapsed intervals under 1 hour. ==> Very unlikely.
# Sometimes, info can get entered twice into the system, esp if there is a shift change.



In [ ]:

    
dfenc